library(Biobase)
library(SummarizedExperiment)
library(openxlsx)
library(stringr)
library(tidyverse)
library(ggplot2)
library(reactable)
library(hypeR)
ggplot2::theme_set(theme(axis.text=element_text(size=12),
                         plot.title=element_text(size=15, vjust=3),
                         plot.margin = unit(c(1,1,1,1), "cm")))
PATH <- file.path(Sys.getenv("MLAB"),"projects/brcameta/exosome/4t1_brca_brain_mets/")
DPATH <- file.path(Sys.getenv("CBM"),"otherStudies/RNAseq/2022-06-03-DenisExosomeBrCaBrainMets/")

do_save <- TRUE

Loading Signatures and Data

all_sigs <- readRDS(file.path(PATH,"data/signatures_symbol.rds"))
all_sigs <- all_sigs[lapply(all_sigs, length) != 0]
names(all_sigs) <- c("IR Control Up", "Control Up", "IR Up", "IR Control Dn", "Control Dn", "IR Down")
deseq_list <- readRDS(file.path(PATH,"data/deseq_list.rds"))

# PAMM Genelists
pamm_paths <- Sys.glob(file.path(PATH, "data/PAMM_Genelists/*"))
pamm_names <- lapply(pamm_paths, stringr::str_match, pattern="genelist-\\s*(.*?)\\s*.xlsx")
pamm_names <- unlist(lapply(pamm_names, function(x) x[2]))

pamm_genelists <- lapply(pamm_paths, openxlsx::read.xlsx, colNames = FALSE)
pamm_genelists <- lapply(pamm_genelists, function(x) x$X2)
names(pamm_genelists) <- pamm_names

hallmark_genesets <- msigdb_download(species="Mus musculus", category="H")

dat <- readRDS(file.path(PATH,"data/4T1_mets_sExp.rds"))
background <- unique(unlist(SummarizedExperiment::rowData(dat)$mgi_symbol))

#Other genesets
methyltransferase_genes <- read.delim(file.path(PATH, "data/methyltransferase_genes_ncbi.txt"))
dnmt_genes <- methyltransferase_genes$Symbol[methyltransferase_genes$Symbol %in% background]

hallmark_genesets$DNMT <- dnmt_genes
h3_meth_genes <- c("PRC1", "PRC2", "EZH2", "EED", "SUZ12", "RING1", "RNF2", "BRD2", "BRD3", "BRD4", "KDM1A", "KDM2A", "KDM3A", "KDM4A", "KDM5A")
library(babelgene)
mouse_h3_meth_genes <- babelgene::orthologs(genes = h3_meth_genes, species = "mouse")
hallmark_genesets$H3METH <- mouse_h3_meth_genes$symbol
names(hallmark_genesets) <- names(hallmark_genesets) %>% str_replace_all(., "_",  " ") %>% str_to_title %>% str_replace_all(., "Hallmark ", "")

hypeR enrichment (HyperGeometric)

Hallmark Mouse

hall_hyp <- hypeR::hypeR(all_sigs,genesets=hallmark_genesets,background=background, test="hypergeometric", fdr=0.05)
hypeR::hyp_dots(hall_hyp,merge=TRUE,top=30)

hypeR::rctbl_build(hall_hyp)

PAMM

pamm_hyp <- hypeR::hypeR(all_sigs,genesets=pamm_genelists,background=background,fdr=0.05)
hypeR::hyp_dots(pamm_hyp,merge=TRUE,top=25)

hypeR::rctbl_build(pamm_hyp)
if (do_save) {
  hypeR::hyp_to_excel(hall_hyp,file.path(PATH,"data/hallmark_hyp.xlsx"))
}

KS-based enrichment

## Add gene symbols to DESeq tables
if (do_save) {
  deseq_list <- lapply(deseq_list, function(Z) {
  as.data.frame(Z) %>% 
    tibble::rownames_to_column(var="ensembl_id") %>%
    dplyr::mutate(geneID=unlist(rowData(dat)[ensembl_id,"mgi_symbol"]))
  })
}
## rank by up-regulation
rank_signatures_up <- lapply(
  deseq_list, function(sig) sig %>% 
    dplyr::filter(geneID!="") %>%
    dplyr::arrange(desc(log2FoldChange)) %>%
    dplyr::select(geneID,log2FoldChange) %>%
    tibble::deframe())
names(rank_signatures_up) <- paste(names(rank_signatures_up),"up",sep="_")

## rank by down-regulation
rank_signatures_dn <- lapply(
  deseq_list,function(sig) sig %>% 
    dplyr::filter(geneID!="") %>%
    dplyr::arrange(log2FoldChange) %>%
    dplyr::select(geneID,log2FoldChange) %>%
    tibble::deframe())
names(rank_signatures_dn) <- paste(names(rank_signatures_dn),"dn",sep="_")

rank_signatures <- c(rank_signatures_up,rank_signatures_dn)
rank_signatures <- rank_signatures[order(names(rank_signatures),decreasing = TRUE)]
names(rank_signatures) <- c("IR Up", "IR Dn", "IR Control Up", "IR Control Dn", "Control Up", "Control Down")

Hallmarks + Pamm

all_genesets <- c(hallmark_genesets, pamm_genelists)
max_fdr <- 0.05
ks_hall <- hypeR::hypeR(signature=rank_signatures,genesets=all_genesets,test="ks",fdr=max_fdr,plotting=TRUE)
hyp_dots(ks_hall,merge=TRUE,fdr=max_fdr/5,top=25) + ggtitle(paste("FDR ≤", max_fdr/5))

hypeR::rctbl_build(ks_hall, show_hmaps=FALSE)

Just IR IS

all_genesets <- c(hallmark_genesets, pamm_genelists)
max_fdr <- 0.05
ks_hall <- hypeR::hypeR(signature=rank_signatures[c("IR Up", "Control Up")],
                        genesets=all_genesets,
                        test="ks",
                        fdr=max_fdr,
                        plotting=TRUE)
hyp_dots(ks_hall,merge=TRUE,fdr=max_fdr,top=30, size_by="none") + ggtitle(paste("FDR ≤", max_fdr))

Just pathways of interest

select_genesets <- c("Hypoxia", "Mitotic Spindle", "Mouse Angiogensis", "Mouse Cancer Stem Cell", "Tgf Beta Signaling", "Mouse EMT genes", "Tnfa Signaling Via Nfkb", "Mtorc1 Signaling")

ks_hall$data$`IR Up`$data <- ks_hall$data$`IR Up`$data %>% filter(label %in% select_genesets)
ks_hall$data$`Control Up`$data <- ks_hall$data$`Control Up`$data %>% filter(label %in% select_genesets)
hyp_dots(ks_hall,merge=TRUE,fdr=max_fdr,top=16, size_by="none") + ggtitle(paste("FDR ≤", max_fdr))

KS Scree Plot

IR IS Up

print(ks_hall$data$`IR Up`$plots[ks_hall$data$`IR Up`$data$fdr<=0.01])
## $`Mouse Angiogensis`

## 
## $Hypoxia

## 
## $`Myc Targets V1`

## 
## $`Mitotic Spindle`

## 
## $`Uv Response Dn`

## 
## $`Mouse EMT genes`

## 
## $`Mouse Cancer Stem Cell`

## 
## $`Tnfa Signaling Via Nfkb`

## 
## $Glycolysis

## 
## $`Oxidative Phosphorylation`

## 
## $`Estrogen Response Early`

## 
## $`Tgf Beta Signaling`

## 
## $`Apical Junction`

## 
## $`Protein Secretion`

## 
## $`Kras Signaling Up`

## 
## $Myogenesis

## 
## $`G2m Checkpoint`

## 
## $`Inflammatory Response`

## 
## $Angiogenesis

## 
## $Coagulation

IR IS Dn

print(ks_hall$data$`IR Dn`$plots[ks_hall$data$`IR Dn`$data$fdr<=0.01])
## NULL

Control IS Up

print(ks_hall$data$`Control Up`$plots[ks_hall$data$`Control Up`$data$fdr<=0.01])
## $`Myc Targets V1`

## 
## $`Epithelial Mesenchymal Transition`

## 
## $`Mtorc1 Signaling`

## 
## $`Uv Response Dn`

## 
## $`Dna Repair`

## 
## $`Tgf Beta Signaling`

## 
## $`Il2 Stat5 Signaling`

## 
## $`Estrogen Response Early`

Control IS Dn

print(ks_hall$data$`Control Down`$plots[ks_hall$data$`Control Down`$data$fdr<=0.01])
## NULL

IR Control Up

print(ks_hall$data$`Control Up`$plots[ks_hall$data$`Control Up`$data$fdr<=0.01])
## $`Myc Targets V1`

## 
## $`Epithelial Mesenchymal Transition`

## 
## $`Mtorc1 Signaling`

## 
## $`Uv Response Dn`

## 
## $`Dna Repair`

## 
## $`Tgf Beta Signaling`

## 
## $`Il2 Stat5 Signaling`

## 
## $`Estrogen Response Early`

IR Control Dn

print(ks_hall$data$`IR Control Up`$plots[ks_hall$data$`IR Control Dn`$data$fdr<=0.01])
## NULL